## Warning: package 'tweedie' was built under R version 3.6.2
## Warning: package 'fitdistrplus' was built under R version 3.6.2
## Warning: package 'MASS' was built under R version 3.6.2
## Warning: package 'survival' was built under R version 3.6.2
df_matus = read.csv("/Users/PeterNovak/Desktop/ba_data.csv")
distribution_name = c("lnorm", "weibull", "frechet", "loglogis", "burr", "gamma", "exp")#, "tweedie")
colours = c("purple", "darkgreen", "dodgerblue", "orange", "red", "brown", "cyan")#, "pink")
fit_matus = get_dist(df_matus$total_wins_spend,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
as.data.frame(fit_matus)
# Your rate
ks.test(df_matus$total_wins_spend, rexp(100000, rate = 1.319))
## Warning in ks.test(df_matus$total_wins_spend, rexp(1e+05, rate = 1.319)): p-
## value will be approximate in the presence of ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: df_matus$total_wins_spend and rexp(1e+05, rate = 1.319)
## D = 0.79521, p-value < 2.2e-16
## alternative hypothesis: two-sided
# My best rate
ks.test(df_matus$total_wins_spend, rexp(100000, rate = 0.0289886))
## Warning in ks.test(df_matus$total_wins_spend, rexp(1e+05, rate = 0.0289886)): p-
## value will be approximate in the presence of ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: df_matus$total_wins_spend and rexp(1e+05, rate = 0.0289886)
## D = 0.31311, p-value < 2.2e-16
## alternative hypothesis: two-sided
# My best distribution
ks.test(df_matus$total_wins_spend, rlnorm(100000, meanlog = 2.210660, sdlog = 1.516598))
## Warning in ks.test(df_matus$total_wins_spend, rlnorm(1e+05, meanlog = 2.21066, :
## p-value will be approximate in the presence of ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: df_matus$total_wins_spend and rlnorm(1e+05, meanlog = 2.21066, sdlog = 1.516598)
## D = 0.063347, p-value < 2.2e-16
## alternative hypothesis: two-sided
df_bingo_aloha = load_df("./data/data_bingo_aloha_30.csv")
df_homw = load_df("./data/data_homw_30.csv")
df_idle_mafia = load_df("./data/data_idle_mafia_30.csv")
df_spongebob = load_df("./data/data_spongebob_30.csv")
df_terra_genesis = load_df("./data/data_terra_genesis_30.csv")
df_ultimex = load_df("./data/data_ultimex_30.csv")
n_boots = 500
dist_bingo_aloha = descdist(df_bingo_aloha$total_wins_spend, boot = n_boots)
dist_homw = descdist(df_homw$total_wins_spend, boot = n_boots)
dist_idle_mafia = descdist(df_idle_mafia$total_wins_spend, boot = n_boots)
dist_spongebob = descdist(df_spongebob$total_wins_spend, boot = n_boots)
dist_terra_genesis = descdist(df_terra_genesis$total_wins_spend, boot = n_boots)
dist_ultimex = descdist(df_ultimex$total_wins_spend, boot = n_boots)
distribution_name = c("lnorm", "weibull", "frechet", "loglogis", "burr", "gamma", "exp")#, "tweedie")
colours = c("purple", "darkgreen", "dodgerblue", "orange", "red", "brown", "cyan")#, "pink")
scale = 100 # otherwise exp fit doesnt work because e^x = inf if x > 710
fit_bingo_aloha = get_dist(df_bingo_aloha$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_homw = get_dist(df_homw$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_idle_mafia = get_dist(df_idle_mafia$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_spongebob = get_dist(df_spongebob$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_terra_genesis = get_dist(df_terra_genesis$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_ultimex = get_dist(df_ultimex$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
raw_fit = list(fit_bingo_aloha, fit_homw, fit_idle_mafia, fit_spongebob, fit_terra_genesis, fit_ultimex)
mat_out2 = matrix(data = NA, nrow = 6, ncol = length(distribution_name),
dimnames = list(c("Bingo Aloha",
"HOMW",
"Idle Mafia",
"Spongebob",
"Terra Genesis",
"Ultimate X-Poker"),
distribution_name))
for (i in 1:nrow(mat_out2)) {
temp_gof = raw_fit[[i]]
for (j in 1:ncol(mat_out2)) {
mat_out2[i, j] = as.numeric(temp_gof[j])
}
}
as.data.frame(mat_out2)
val_mat = as.data.frame(rbind(
cbind(rownames(fit_bingo_aloha),
c(rep("Bingo Aloha", 4)),
fit_bingo_aloha),
cbind(rownames(fit_homw),
c(rep("HOMW", 4)),
fit_homw),
cbind(rownames(fit_idle_mafia),
c(rep("Idle Mafia", 4)),
fit_idle_mafia),
cbind(rownames(fit_spongebob),
c(rep("Spongebob", 4)),
fit_spongebob),
cbind(rownames(fit_terra_genesis),
c(rep("Terra Genesis", 4)),
fit_terra_genesis),
cbind(rownames(fit_ultimex),
c(rep("Ultimate X-Poker", 4)),
fit_ultimex)
))
## Warning in cbind(rownames(fit_bingo_aloha), c(rep("Bingo Aloha", 4)),
## fit_bingo_aloha): number of rows of result is not a multiple of vector length
## (arg 2)
## Warning in cbind(rownames(fit_homw), c(rep("HOMW", 4)), fit_homw): number of
## rows of result is not a multiple of vector length (arg 2)
## Warning in cbind(rownames(fit_idle_mafia), c(rep("Idle Mafia", 4)),
## fit_idle_mafia): number of rows of result is not a multiple of vector length
## (arg 2)
## Warning in cbind(rownames(fit_spongebob), c(rep("Spongebob", 4)),
## fit_spongebob): number of rows of result is not a multiple of vector length (arg
## 2)
## Warning in cbind(rownames(fit_terra_genesis), c(rep("Terra Genesis", 4)), :
## number of rows of result is not a multiple of vector length (arg 2)
## Warning in cbind(rownames(fit_ultimex), c(rep("Ultimate X-Poker", 4)),
## fit_ultimex): number of rows of result is not a multiple of vector length (arg
## 2)
colnames(val_mat)[1:2] = c("distribution", "client")
rownames(val_mat) = 1:nrow(val_mat)
val_mat$aic = as.numeric(as.character(val_mat$aic))
val_mat$bic = as.numeric(as.character(val_mat$bic))
val_mat$chsq = as.numeric(as.character(val_mat$chsq))
val_mat$ks = as.numeric(as.character(val_mat$ks))
write.csv(val_mat, "./goodness_of_fit_per_client_raw.csv")
a = ggplot(
val_mat, aes(x = distribution, y = aic, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./aic_plot_all.png", plot = a,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
a
b = ggplot(
val_mat, aes(x = distribution, y = bic, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./bic_plot_all.png", plot = b,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
b
c = ggplot(
val_mat, aes(x = distribution, y = chsq, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./chsq_plot_all.png", plot = c,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
c
d = ggplot(
val_mat, aes(x = distribution, y = ks, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./ks_plot_all.png", plot = d,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
d
All five of the following distributions used are left skewed distributions with similar properties… So results are bound to be similar.
distribution_name = c("lnorm", "weibull", "frechet", "loglogis", "burr")
colours = c("purple", "darkgreen", "dodgerblue", "orange", "red")
scale = 1
fit_bingo_aloha = get_dist(df_bingo_aloha$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_homw = get_dist(df_homw$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_idle_mafia = get_dist(df_idle_mafia$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_spongebob = get_dist(df_spongebob$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_terra_genesis = get_dist(df_terra_genesis$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
fit_ultimex = get_dist(df_ultimex$total_wins_spend/scale,
distribution_name, colours, T, F, F)
## Warning in fitdist(data = input_list, method = "mle", distr = "frechet", : The
## pfrechet function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "loglogis", : The
## ploglogis function should have its first argument named: q as in base R
## Warning in fitdist(data = input_list, method = "mle", distr = "burr", start =
## list(a = 1, : The pburr function should have its first argument named: q as in
## base R
raw_fit = list(fit_bingo_aloha, fit_homw, fit_idle_mafia, fit_spongebob, fit_terra_genesis, fit_ultimex)
mat_out2 = matrix(data = NA, nrow = 6, ncol = length(distribution_name),
dimnames = list(c("Bingo Aloha",
"HOMW",
"Idle Mafia",
"Spongebob",
"Terra Genesis",
"Ultimate X-Poker"),
distribution_name))
for (i in 1:nrow(mat_out2)) {
temp_gof = raw_fit[[i]]
for (j in 1:ncol(mat_out2)) {
mat_out2[i, j] = as.numeric(temp_gof[j])
}
}
as.data.frame(mat_out2)
val_mat = as.data.frame(rbind(
cbind(rownames(fit_bingo_aloha),
c(rep("Bingo Aloha", nrow(fit_bingo_aloha))),
fit_bingo_aloha),
cbind(rownames(fit_homw),
c(rep("HOMW", nrow(fit_homw))),
fit_homw),
cbind(rownames(fit_idle_mafia),
c(rep("Idle Mafia", nrow(fit_idle_mafia))),
fit_idle_mafia),
cbind(rownames(fit_spongebob),
c(rep("Spongebob", nrow(fit_spongebob))),
fit_spongebob),
cbind(rownames(fit_terra_genesis),
c(rep("Terra Genesis", nrow(fit_terra_genesis))),
fit_terra_genesis),
cbind(rownames(fit_ultimex),
c(rep("Ultimate X-Poker", nrow(fit_ultimex))),
fit_ultimex)
))
colnames(val_mat)[1:2] = c("distribution", "client")
rownames(val_mat) = 1:nrow(val_mat)
val_mat$aic = as.numeric(as.character(val_mat$aic))
val_mat$bic = as.numeric(as.character(val_mat$bic))
val_mat$chsq = as.numeric(as.character(val_mat$chsq))
val_mat$ks = as.numeric(as.character(val_mat$ks))
#write.csv(val_mat, "./goodness_of_fit_per_client_raw.csv")
a = ggplot(
val_mat, aes(x = distribution, y = aic, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./aic_plot_all.png", plot = a,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
a
b = ggplot(
val_mat, aes(x = distribution, y = bic, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./bic_plot_all.png", plot = b,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
b
c = ggplot(
val_mat, aes(x = distribution, y = chsq, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./chsq_plot_all.png", plot = c,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
c
d = ggplot(
val_mat, aes(x = distribution, y = ks, fill = distribution)) +
geom_bar(stat = "identity",
position = "dodge") +
facet_wrap(~ client, 1, 6)
#ggsave("./ks_plot_all.png", plot = d,
# width = 36, height = 10, units = "cm", scale = 1.5
#)
d